library(ggplot2)
library(tidyverse)
library(dplyr)
library(harmony)
library(Seurat)
# Import seurat object
seur.human <- readRDS("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/raw_data/human_data/filtered_seurat_Harmony_07-22-22.RDS")
# Sanity check
print(seur.human) # 79,801 cells (it's the whole seurat object)
## An object of class Seurat
## 34778 features across 79801 samples within 2 assays
## Active assay: SCT (17389 features, 3000 variable features)
## 1 other assay present: RNA
## 3 dimensional reductions calculated: pca, umap, harmony
# Quick visualization
DimPlot(seur.human)
#_______________
# PREPROCESSING #
#________________
Preprocess <- function(seuratobject, SCTsplit=T, res=0.5, harmony_vars= c("Method", "Batch"), printComputation=T){
# Extract the counts data (from RNA assay) and metadata
counts <- GetAssayData(object = seuratobject, slot = "counts", assay="RNA")
metadata <- seuratobject@meta.data
metadata[, c("nCount_SCT", "nFeature_SCT", "SCT_snn_res.0.7", "seurat_clusters", "SCT_snn_res.0.9", "SCT_snn_res.1")] <- NULL # remove some cluster columns to avoid any confusion
colnames(metadata)[14] <- "clusters_bigUMAP" # rename "new_clusters" column
# Create new seurat object with only the counts and the "cleaned" metadata
initseurat <- CreateSeuratObject(counts = counts, meta.data = metadata, min.cells=20) # keep only genes expressed in at least 20 cells
cat("Initial seurat object:\n")
print(initseurat) # this is for sanity check
if(SCTsplit==T){
cat("\n1-Perform SCTransform on split data\n")
# Normalize with SCTransform by splitting object (per batch)
seur.list <- SplitObject(initseurat, split.by = "orig.ident")
for(i in 1:length(seur.list)){
cat("...SCTransform on", names(seur.list)[i], "\n")
seur.list[[i]] <- SCTransform(seur.list[[i]], vars.to.regress = 'percent.mt', verbose=printComputation, seed.use = 1448145)
}
# seur.list <- lapply(X = seur.list, FUN = SCTransform, vars.to.regress="percent.mt", method="glmGamPois") # not working
seur.SCT <- merge(seur.list[[1]], y = unlist(seur.list[-1], use.names=FALSE), merge.data = TRUE)
sct.hvg <- SelectIntegrationFeatures(object.list = seur.list, nfeatures = 3000)
VariableFeatures(seur.SCT) <- sct.hvg
}
else if(SCTsplit==F){
cat("\n1-Perform SCTransform on merged data")
# Normalize with SCTransform (without splitting object)
seur.SCT <- SCTransform(initseurat,
assay="RNA",
new.assay.name="SCT",
vars.to.regress = 'percent.mt',
seed.use = 1448145,
verbose=printComputation)
}
# Run PCA (don't scale data when using SCTransform)
cat("\n2-Run PCA pre-integration")
seur.SCT <- RunPCA(object = seur.SCT, assay = "SCT", npcs = 50, seed.use=42, verbose=printComputation)
p1 <- DimPlot(seur.SCT, reduction = "pca", group.by = "orig.ident") + ggtitle("PCA pre-integration")
ElbowPlot(seur.SCT)
cat("\nSeurat object pre-integration:\n")
print(seur.SCT)
# Run Harmony for integration
cat("\n3-Run Harmony\n")
seur.harmony <- RunHarmony(object = seur.SCT,
assay.use = "SCT",
reduction = "pca",
dims.use = 1:50,
max.iter.harmony=30,
group.by.vars = harmony_vars,
plot_convergence = printComputation)
cat("\nSeurat object post-integration:\n")
print(seur.harmony)
cat("\n4-Run UMAP & clustering\n")
seur.harmony <- RunUMAP(object = seur.harmony, dims = 1:15, assay = "SCT", reduction = "harmony", verbose=printComputation, seed.use=42)
seur.harmony <- FindNeighbors(object = seur.harmony, dims = 1:15, assay = "SCT", reduction = "harmony", verbose=printComputation) %>%
FindClusters(resolution = res, verbose=printComputation, random.seed=42)
# Show PCA after integration
p2 <- DimPlot(seur.harmony, reduction = "harmony", group.by = "orig.ident") + ggtitle("PCA post-integration")
print(p1 | p2)
return(seur.harmony)
}
#__________________
# GENE SIGNATURES #
#__________________
# Gene signatures from Park paper
# DN_score <- list(c("AC002454.1", "CD7", "IGLL1", "RCBTB2", "SMIM24", "TPM4", "C1orf228", "CTHRC1", "CKLF",
# "SELL", "MLLT11", "CDK6", "MZB1", "SPNS3", "IFITM1", "CD34", "CD82", "SERPINB1", "LPAR6", "ETV5")) # negative control (DNearly)
# DN_score <- list(c("PTCRA", "MAL", "JCHAIN", "CD1E", "ADA", "UGT3A2", "LRRC28", "IL7R", "LIMS2", "RP11-404O13.5",
# "TRDC", "ATP6AP1L", "ID1", "SELL", "GALNT2", "AC011893.3", "ADGRG1", "TFDP2", "MSI2", "CD7")) # negative control (DNQ)
DPq_score <- list(c("SH3TC1", "CD8B", "RAG2", "CD1B", "ELOVL4", "RAG1", "LYST", "LTB", "SMPD3", "SIT1",
"AQP3", "CD38", "PTCRA", "CD8A", "NFATC3", "CD52", "RP11-620J15.3", "ARPP21", "H1FX", "AEBP1"))
DPp_score <- list(c("SMPD3", "CD8A", "CD1E", "UHRF1", "RP11-144L1.4", "SH3TC1", "CD8B", "CD1B", "CD1A", "ELOVL4",
"TCF7", "SH2D1A", "ANAPC15", "ACOT7", "AQP3", "TTK", "SMC2", "PITPNM2", "HNRNPR", "CHRNA3"))
Cd8aa1_score <- list(c("NUCB2", "CD27", "LEF1", "PTPN7", "TRAC", "PRKCH", "CD28", "LCP2", "CD2", "IKZF1", "AQP3", "CD8A",
"GNG4", "SH3BGRL3", "LIMD2", "ELOVL5", "TOX", "SH2D1A", "CAPZA1", "XCL1"))
Cd8aa2_score <- list(c("CTSW", "TRGC2", "SMC4", "CD7", "IKZF2", "ZNF683", "ACTN1", "SMIM24", "HCST", "MME", "CD8A",
"TRG-AS1", "RGS2", "RTKN2", "PPP2R5C", "ID3", "TRGC1", "CLEC2D", "DCXR", "ELF1", "CPNE7"))
NKT_score <- list(c("GZMK", "NKG7", "CST7", "GZMM", "XCL1", "SAMD3", "CCL5", "GIMAP4", "ITM2C", "CD8A",
"CLEC2B", "IL32", "TRGC1", "HSPD1", "TRDC", "LITAF", "HSPA1B", "CD44", "PYHIN1", "HSPA1A"))
SPentry_score <- list(c("SATB1", "TOX2", "CHI3L2", "CCR9", "ITM2A", "AIF1", "RPS4Y1", "TRAC", "ANKRD44", "FYB", "ODF2L",
"EPHB6", "CCT2", "BANF1", "POLR2J3", "ANXA6", "NUDC", "MAD1L1", "CD1A", "CD1E"))
Tago_score <- list(c("UBE2F", "DHRS3", "MAGEH1", "ITM2A", "C1orf228", "SMS", "BACH2", "TNFRSF1B", "COTL1",
"SERPINE2", "NREP", "LCP2", "CD247", "LAT", "ITGAE", "VASP", "CREM", "CD40LG", "PHB", "CCND2"))
gdT_score <- list(c("TRDC", "ANXA1", "LAT", "SELL", "CCR9", "SMPD3", "FCGRT", "TESPA1", "SH3BP5", "AES", "TDG",
"LIMD2", "SEMA4D", "MYL6B", "CD247", "ARL4C", "ID3", "RTKN2", "MLLT11", "TRGC2"))
NK_score <- list(c("GNLY", "TYROBP", "KLRD1", "NKG7", "KLRC1", "CMC1", "KLRB1", "XCL2", "KLRF1", "HOPX",
"FCER1G", "CTSW", "PRF1", "MATK", "GZMB", "CCL3", "IL2RB", "IFITM2", "GZMH", "XCL1"))
Egress <- list(c("KLF2", "CORO1A", "CCR7", "CXCR4", "CXCR6", "FOXO1", "CXCR3", "S1PR1", "S1PR4",
"S100A4", "S100A6", "EMP3"))
# Function to look at these gene signatures
signatures <- function(seuratobject=NKT_SCTmerged, scorelist=DN_score, scorename="DN_score", celltype="NKT"){
# Add signatures to new seurat object
# NKT_SCTsplit_score <- AddModuleScore(object = NKT_SCTsplit, features = scorelist, assay = "SCT", name = scorename)
seurat_scored <- AddModuleScore(object = seuratobject, features = scorelist, assay = "SCT", name = scorename)
# SCpubr::do_FeaturePlot(sample = NKT_SCTsplit_score,
# features = paste0(scorename, "1"),
# plot.title = "NKT cells | SCTransform on split data",
# reduction = "umap",
# viridis_color_map = "inferno") |
SCpubr::do_FeaturePlot(sample = seurat_scored,
features = paste0(scorename, "1"),
plot.title = paste(celltype, "cells"),
reduction = "umap",
viridis_color_map = "inferno")
}
# Isolate MAIT cells
MAIT_Thymus <- subset(seur.human, ident = "MAIT_Thymus") # 4689 cells
# Quick QC
FeatureScatter(MAIT_Thymus, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch")
FeatureScatter(MAIT_Thymus, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Batch")
# PREPROCESS
MAIT_SCTsplit <- Preprocess(seuratobject = MAIT_Thymus, SCTsplit = T, res = 0.3, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10772 features across 4689 samples within 1 assay
## Active assay: RNA (10772 features, 0 variable features)
##
## 1-Perform SCTransform on split data
## ...SCTransform on MAIT_1_Thymus
## ...SCTransform on MAIT_2_Thymus
## ...SCTransform on MAIT_3_Thymus
##
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
MAIT_SCTmerged <- Preprocess(seuratobject = MAIT_Thymus, SCTsplit = F, res = 0.3, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10772 features across 4689 samples within 1 assay
## Active assay: RNA (10772 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
# SAVE OBJECT (for scClustEval)
# MAIT_Thymus@meta.data[, c("nCount_SCT", "nFeature_SCT", "SCT_snn_res.0.7", "seurat_clusters", "SCT_snn_res.0.9", "SCT_snn_res.1")] <- NULL # remove some cluster columns to avoid any confusion
# colnames(MAIT_Thymus@meta.data)[14] <- "clusters_bigUMAP" # rename "new_clusters" column
# saveRDS(MAIT_Thymus, "~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/07_scClustEval/input/mait_object_22-11-10.rds")
# Sanity checks
# DimPlot(MAIT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "Donor", label = T) + ggtitle("SCT on split data") |
# DimPlot(MAIT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "Donor", label = T) + ggtitle("SCT on merged data")
# Look at clusters
DimPlot(MAIT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.3", label = T) + ggtitle("SCT on split data") |
DimPlot(MAIT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.3", label = T) + ggtitle("SCT on merged data")
# Look at gene signatures
# signatures(scorelist=DN_score, scorename="DN_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=gdT_score, scorename="gdT_score") # negative control
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=DPq_score, scorename="DPq_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=DPp_score, scorename="DPp_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=Cd8aa1_score, scorename="CD8aa1_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=Cd8aa2_score, scorename="CD8aa2_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=NKT_score, scorename="NKT_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=SPentry_score, scorename="SPentry_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=Tago_score, scorename="Tagonist_score")
signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=Egress, scorename="Egress_score")
# signatures(seuratobject=MAIT_SCTmerged, celltype = "MAIT", scorelist=NK_score, scorename="NK_score") # several genes are not found
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/08_HumanMAITdata/mait_parksignatures_9.jpeg", width=9, height=7)
# Try different resolutions
MAIT_SCT_06 <- Preprocess(seuratobject = MAIT_Thymus, SCTsplit = F, res = 0.6, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10772 features across 4689 samples within 1 assay
## Active assay: RNA (10772 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
MAIT_SCT_08 <- Preprocess(seuratobject = MAIT_Thymus, SCTsplit = F, res = 0.8, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10772 features across 4689 samples within 1 assay
## Active assay: RNA (10772 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
MAIT_SCT_1 <- Preprocess(seuratobject = MAIT_Thymus, SCTsplit = F, res = 1, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10772 features across 4689 samples within 1 assay
## Active assay: RNA (10772 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
DimPlot(MAIT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.3", label = T) + ggtitle("res=0.3") |
DimPlot(MAIT_SCT_06, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.6", label = T) + ggtitle("res=0.6") |
DimPlot(MAIT_SCT_08, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.8", label = T) + ggtitle("res=0.8") |
DimPlot(MAIT_SCT_1, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.1", label = T) + ggtitle("res=1")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/mait_sctransform-merged_resolutions.jpeg", width=20, height=5)
# Isolate NKT cells
NKT_Thymus <- subset(seur.human, ident = "NKT_Thymus") # 2551 cells
# Quick QC
FeatureScatter(NKT_Thymus, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch")
FeatureScatter(NKT_Thymus, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Batch")
# PREPROCESS
NKT_SCTsplit <- Preprocess(seuratobject = NKT_Thymus, SCTsplit = T, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10005 features across 2551 samples within 1 assay
## Active assay: RNA (10005 features, 0 variable features)
##
## 1-Perform SCTransform on split data
## ...SCTransform on NKT_1_Thymus
## ...SCTransform on NKT_2_Thymus
## ...SCTransform on NKT_3_Thymus
##
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
NKT_SCTmerged <- Preprocess(seuratobject = NKT_Thymus, SCTsplit = F, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10005 features across 2551 samples within 1 assay
## Active assay: RNA (10005 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
# SAVE OBJECT (for scClustEval)
# NKT_Thymus@meta.data[, c("nCount_SCT", "nFeature_SCT", "SCT_snn_res.0.7", "seurat_clusters", "SCT_snn_res.0.9", "SCT_snn_res.1")] <- NULL # remove some cluster columns to avoid any confusion
# colnames(NKT_Thymus@meta.data)[14] <- "clusters_bigUMAP" # rename "new_clusters" column
# saveRDS(NKT_Thymus, "~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/07_scClustEval/input/nkt_object_22-11-10.rds")
# Sanity checks
# DimPlot(NKT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "Donor", label = T) + ggtitle("SCT on split data") |
# DimPlot(NKT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "Donor", label = T) + ggtitle("SCT on merged data")
# Look at clusters
DimPlot(NKT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on split data") |
DimPlot(NKT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on merged data")
# Look at gene signatures
# signatures(scorelist=DN_score, scorename="DN_score") # several genes are not found
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=gdT_score, scorename="gdT_score") # negative control
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_1.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=DPq_score, scorename="DPq_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_2.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=DPp_score, scorename="DPp_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_3.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=Cd8aa1_score, scorename="CD8aa1_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_4.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=Cd8aa2_score, scorename="CD8aa2_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_5.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=NKT_score, scorename="NKT_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_6.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=SPentry_score, scorename="SPentry_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_7.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=Tago_score, scorename="Tagonist_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_8.jpeg", width=9, height=7)
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=Egress, scorename="Egress_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_parksignatures_9.jpeg", width=9, height=7)
# signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=NK_score, scorename="NK_score") # several genes are not found
# Signatures of c1 vs c2 cells
c1_score <- list(c("CD4", "COTL1", "LEF1", "MAP3K1", "PLP2", "XCL1", "ICOS"))
c2_score <- list(c("GNLY", "GZMB", "GZMH", "KLRD1", "KLRF1", "KLRK1", "NKG7", "PRF1", "TYROBP", "DBN1", "CD161", "CD94", "CCR5"))
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=c1_score, scorename="c1_score")
signatures(seuratobject=NKT_SCTmerged, celltype = "NKT", scorelist=c2_score, scorename="c2_score")
# Try different resolutions
NKT_SCT_06 <- Preprocess(seuratobject = NKT_Thymus, SCTsplit = F, res = 0.6, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10005 features across 2551 samples within 1 assay
## Active assay: RNA (10005 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
NKT_SCT_08 <- Preprocess(seuratobject = NKT_Thymus, SCTsplit = F, res = 0.8, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10005 features across 2551 samples within 1 assay
## Active assay: RNA (10005 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
NKT_SCT_1 <- Preprocess(seuratobject = NKT_Thymus, SCTsplit = F, res = 1, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10005 features across 2551 samples within 1 assay
## Active assay: RNA (10005 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
DimPlot(NKT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("res=0.4") |
DimPlot(NKT_SCT_06, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.6", label = T) + ggtitle("res=0.6") |
DimPlot(NKT_SCT_08, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.8", label = T) + ggtitle("res=0.8") |
DimPlot(NKT_SCT_1, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.1", label = T) + ggtitle("res=1")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/nkt_sctransform-merged_resolutions.jpeg", width=20, height=5)
# Isolate gdT cells
gdT_Thymus <- subset(seur.human, ident = "GD_Thymus") # 2981 cells
# Quick QC
FeatureScatter(gdT_Thymus, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch")
FeatureScatter(gdT_Thymus, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Batch")
# PREPROCESS
gdT_SCTsplit <- Preprocess(seuratobject = gdT_Thymus, SCTsplit = T, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 11860 features across 2981 samples within 1 assay
## Active assay: RNA (11860 features, 0 variable features)
##
## 1-Perform SCTransform on split data
## ...SCTransform on GD_1_Thymus
## ...SCTransform on GD_2_Thymus
##
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
gdT_SCTmerged <- Preprocess(seuratobject = gdT_Thymus, SCTsplit = F, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 11860 features across 2981 samples within 1 assay
## Active assay: RNA (11860 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
# SAVE OBJECT (for scClustEval)
# gdT_Thymus@meta.data[, c("nCount_SCT", "nFeature_SCT", "SCT_snn_res.0.7", "seurat_clusters", "SCT_snn_res.0.9", "SCT_snn_res.1")] <- NULL # remove some cluster columns to avoid any confusion
# colnames(gdT_Thymus@meta.data)[14] <- "clusters_bigUMAP" # rename "new_clusters" column
# saveRDS(gdT_Thymus, "~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/07_scClustEval/input/gdt_object_22-11-10.rds")
# Sanity checks
# DimPlot(seur.harmony, dims = 1:2, group.by = "orig.ident")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Sex")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Donor")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Batch")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Method")
# Look at clusters
DimPlot(gdT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on split data") |
DimPlot(gdT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on merged data")
# Look at gene signatures
# signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=DN_score, scorename="DN_score") # several genes are not found
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=gdT_score, scorename="gdT_score") # negative control
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_1.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=DPq_score, scorename="DPq_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_2.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=DPp_score, scorename="DPp_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_3.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=Cd8aa1_score, scorename="CD8aa1_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_4.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=Cd8aa2_score, scorename="CD8aa2_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_5.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=NKT_score, scorename="NKT_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_6.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=SPentry_score, scorename="SPentry_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_7.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=Tago_score, scorename="Tagonist_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_8.jpeg", width=9, height=7)
signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=Egress, scorename="Egress_score")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/09_HumanGDTdata/gdt_parksignatures_9.jpeg", width=9, height=7)
# signatures(seuratobject=gdT_SCTmerged, celltype = "gdT", scorelist=NK_score, scorename="NK_score") # several genes are not found
# rownames(seur.human)[grep("CD1", rownames(seur.human))]
# Try different resolutions
GDT_SCT_06 <- Preprocess(seuratobject = gdT_Thymus, SCTsplit = F, res = 0.6, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 11860 features across 2981 samples within 1 assay
## Active assay: RNA (11860 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
GDT_SCT_08 <- Preprocess(seuratobject = gdT_Thymus, SCTsplit = F, res = 0.8, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 11860 features across 2981 samples within 1 assay
## Active assay: RNA (11860 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
GDT_SCT_1 <- Preprocess(seuratobject = gdT_Thymus, SCTsplit = F, res = 1, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 11860 features across 2981 samples within 1 assay
## Active assay: RNA (11860 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering
DimPlot(gdT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("res=0.4") |
DimPlot(GDT_SCT_06, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.6", label = T) + ggtitle("res=0.6") |
DimPlot(GDT_SCT_08, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.8", label = T) + ggtitle("res=0.8") |
DimPlot(GDT_SCT_1, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.1", label = T) + ggtitle("res=1")
# ggsave("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/04_HumanNKTdata/gdt_sctransform-merged_resolutions.jpeg", width=20, height=5)